business <- read_csv("./data/business.csv") %>%
mutate(name = str_sub(name, 2, -2)) %>%
mutate(address = str_sub(address, 2, -2))
## Parsed with column specification:
## cols(
## business_id = col_character(),
## name = col_character(),
## neighborhood = col_character(),
## address = col_character(),
## city = col_character(),
## state = col_character(),
## postal_code = col_integer(),
## latitude = col_double(),
## longitude = col_double(),
## stars = col_double(),
## review_count = col_integer(),
## is_open = col_integer(),
## categories = col_character()
## )
categories <- business %>%
select(business_id, categories) %>%
separate(categories, into = c("cat1", "cat2", "cat3", "cat4", "cat5", "cat6", "cat7", "cat8", "cat9", "cat10", "cat11", "cat12", "cat13", "cat14", "cat15", "cat16", "cat17", "cat18", "cat19", "cat20", "cat21", "cat22", "cat23", "cat24", "cat25"), sep = ";") %>%
gather(key = "cat_id", value = "category", starts_with("cat")) %>%
select(-cat_id)
## Warning: Expected 25 pieces. Additional pieces discarded in 1 rows [6851].
## Warning: Expected 25 pieces. Missing pieces filled with `NA` in 26774
## rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
## 20, ...].
restaurant_ids <- categories %>%
filter(category == "Restaurants" | category == "Food") %>%
distinct(business_id)
restaurants <- business %>%
left_join(restaurant_ids, by = "business_id") %>%
select(-categories)
rm(restaurant_ids)
rm(business)
attributes <- read_csv("./data/attributes.csv") %>%
janitor::clean_names() %>%
select(business_id, alcohol)
## Parsed with column specification:
## cols(
## .default = col_character()
## )
## See spec(...) for full column specifications.
restaurants <- restaurants %>%
left_join(attributes, by = "business_id")
Center of Las Vegas: 36.1699° N, 115.1398° W. Plotly displayed is surrounds this center by 0.5 degrees longitude and latitude.
restaurants %>%
filter(latitude > 35.6699 & latitude < 36.6699) %>%
filter(longitude < -114.6398 & longitude > -115.6398) %>%
plot_ly(x = ~longitude, y = ~latitude, type = "scatter", mode = "markers",
alpha = 0.5,
color = ~stars) %>%
layout(xaxis = list(title = "Longitude"),
yaxis = list(title = "Latitude"))